### load the data
setwd("/Users/sergazy/Downloads/Fall 2018 courses and all files/Eric's class/08:30:2018 Alternative estimators")
load("dataL1L2.Rdata")
ls()
##   [1] "Area"                 "average_error1"       "average_error2"      
##   [4] "average_error3"       "average_error4"       "beta1"               
##   [7] "beta2"                "betaMLE"              "con"                 
##  [10] "D1"                   "D1_e"                 "D2"                  
##  [13] "D2_e"                 "D2lik"                "dev0"                
##  [16] "dev1"                 "diagonal"             "Dlik"                
##  [19] "err"                  "error1"               "error2"              
##  [22] "error3"               "error4"               "estimatedSigmaSq"    
##  [25] "eta_one"              "eta_two"              "eta1_true"           
##  [28] "eta1fit"              "eta2_true"            "eta2fit"             
##  [31] "F"                    "f_2"                  "f_value_2"           
##  [34] "f_value_e"            "fc"                   "fc_value"            
##  [37] "fisher_info"          "fit"                  "fit_H0"              
##  [40] "fit5"                 "fitc"                 "fitc2"               
##  [43] "fitH0"                "fitZ2"                "h_m"                 
##  [46] "H0betaMLE"            "height"               "i"                   
##  [49] "Illit"                "Illit2"               "Income"              
##  [52] "lik"                  "logArea"              "mean_true"           
##  [55] "mean_true_vector"     "mu_i"                 "muhat_i"             
##  [58] "Murder"               "N"                    "NewtonRaphsonOptim"  
##  [61] "NROptimNoQuadEta1"    "NROptimNoQuadEta2"    "ones"                
##  [64] "optimized"            "p_value_e"            "Pop"                 
##  [67] "predictedFromGLM"     "predictedFromSLM"     "predictionSimulation"
##  [70] "pval1"                "r"                    "refit3"              
##  [73] "residualsGLM"         "residualsSLM"         "RSS"                 
##  [76] "RSS_H0"               "RSS4"                 "RSS5"                
##  [79] "RSSc"                 "RSSc2"                "sighatsq"            
##  [82] "sigma_glm"            "sigma_i"              "sigma_lm"            
##  [85] "sigma_true"           "sigmahat_i"           "sigmasquare"         
##  [88] "singingvoice"         "SS_glm"               "SS_lm"               
##  [91] "SSres"                "SSresH0"              "SSresH02"            
##  [94] "t"                    "T"                    "t1"                  
##  [97] "T2"                   "v"                    "x"                   
## [100] "x1"                   "X1"                   "x2"                  
## [103] "X2"                   "xsquared"             "y"                   
## [106] "Y"                    "y_sim"                "y_sims_glm"          
## [109] "y_sims_lm"            "Z"                    "z1"                  
## [112] "z2"                   "Z2"
#a
fit=lm(Y~x1+x2) # in the question Y is dependent  variable is given
summary(fit)
## 
## Call:
## lm(formula = Y ~ x1 + x2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.5518  -1.3366   0.1614   1.3546  10.9500 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.2550     1.1196   2.907  0.00555 ** 
## x1           -1.1369     0.1590  -7.150 4.85e-09 ***
## x2            1.8009     0.2932   6.143 1.64e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.79 on 47 degrees of freedom
## Multiple R-squared:  0.6653, Adjusted R-squared:  0.651 
## F-statistic:  46.7 on 2 and 47 DF,  p-value: 6.77e-12
resid<-fit$res
hist(resid)

plot of chunk unnamed-chunk-1

qqnorm(resid)
u<-qqnorm(resid, plot= FALSE)
qqline(resid) # it looks a bit of.

plot of chunk unnamed-chunk-1

#b
L1 = function(b,x1,x2,Y){
  sum(abs(Y-b[1]-b[2]*x1-b[3]*x2))
}
par0=fit$coef #Estimator comes from least square
par=optim(par0,L1,gr=NULL,x1,x2,Y) # estimator comes from L1 
par
## $par
## (Intercept)          x1          x2 
##    2.613557   -1.037200    1.887199 
## 
## $value
## [1] 117.1935
## 
## $counts
## function gradient 
##      266       NA 
## 
## $convergence
## [1] 0
## 
## $message
## NULL
#now compare with par and par0

# I am doing something here 
X=cbind(1,x1,x2)
dim(X)
## [1] 50  3
X %*% as.matrix(par$par)
##             [,1]
##  [1,] -6.9459701
##  [2,] -6.1897725
##  [3,] -0.9818339
##  [4,]  3.7647635
##  [5,]  2.8429985
##  [6,] -5.7717323
##  [7,]  4.7146451
##  [8,]  0.4380312
##  [9,]  3.8230497
## [10,]  7.8173365
## [11,] 14.1231268
## [12,] -5.3258099
## [13,] -1.7146461
## [14,]  3.9720566
## [15,] -7.1711405
## [16,]  2.0589541
## [17,]  2.5577729
## [18,]  4.0088094
## [19,]  0.3219342
## [20,] -0.8355570
## [21,] -5.1616104
## [22,]  7.8719187
## [23,] -1.9167159
## [24,]  3.9194666
## [25,] -4.8364955
## [26,]  3.3485642
## [27,]  5.7473657
## [28,]  4.4916913
## [29,]  1.3412889
## [30,] -3.5367874
## [31,] -6.2153116
## [32,]  6.1140190
## [33,]  0.5588167
## [34,] -3.0514979
## [35,] -3.8661249
## [36,] -2.1059421
## [37,]  2.8109264
## [38,] -4.7110091
## [39,] 11.7033723
## [40,]  1.8591111
## [41,]  3.6940842
## [42,] -1.0406509
## [43,] -3.5759672
## [44,]  6.3762887
## [45,] -2.2509685
## [46,]  3.9570232
## [47,] 10.0364562
## [48,] -4.1892696
## [49,] -5.6742568
## [50,]  3.2143553
resid2 <- Y - X %*% as.matrix(par$par)
resid2 - resid
##              [,1]
##  [1,] -0.30630563
##  [2,] -0.34176348
##  [3,] -0.43082438
##  [4,]  0.54175129
##  [5,] -0.09429347
##  [6,] -0.35282281
##  [7,]  0.13895855
##  [8,]  0.41428492
##  [9,]  0.11574931
## [10,]  0.39741513
## [11,] -0.52380385
## [12,] -0.45493020
## [13,]  0.18603513
## [14,]  0.57080533
## [15,] -0.32453892
## [16,]  0.22320400
## [17,]  0.45140238
## [18,]  0.21368415
## [19,]  0.40224079
## [20,]  0.07414331
## [21,] -0.33901358
## [22,]  0.24018879
## [23,] -0.35201754
## [24,]  0.39694684
## [25,] -0.10475720
## [26,]  0.47198112
## [27,] -0.52303690
## [28,]  0.54030514
## [29,]  0.48060844
## [30,] -0.53969691
## [31,] -0.38589301
## [32,]  0.11120279
## [33,] -0.17591784
## [34,] -0.30617021
## [35,] -0.05674753
## [36,] -0.09831705
## [37,]  0.52815705
## [38,] -0.09487287
## [39,] -0.71916164
## [40,] -0.23211383
## [41,]  0.21613444
## [42,] -0.59660019
## [43,] -0.18291671
## [44,]  0.32647229
## [45,] -0.47330195
## [46,]  0.11265737
## [47,]  0.05744420
## [48,] -0.26116772
## [49,] -0.27097552
## [50,]  0.45731558
hist(resid2-resid)

plot of chunk unnamed-chunk-1

mean(resid2 - resid)
## [1] -0.01745745
#this is over I don't think the above is necessary

#c
X=cbind(1,x1,x2)
M=1000
b1sim=matrix(0,M,3)
b2sim=matrix(0,M,3)
error1=0
error2=0
S=sqrt(sum(fit$res^2)/47)
for (i in 1:M){
  Usim=rnorm(50)*S
  Ysim= X%*%fit$coef + Usim
  fitsim=lm(Ysim~x1+x2)
  b2sim[i,]=fitsim$coef
  parsim=optim(fitsim$coef,L1,gr=NULL,x1,x2,Ysim)
  b1sim[i,]=parsim$par
  error1=error1+sum((b1sim[i,]-fit$coef)^2)
  error2=error2+sum((b2sim[i,]-fit$coef)^2)
}
average_error1=error1/1000
average_error2=error2/1000

#d
#bla bla # Prove it 

#e
X=cbind(1,x1,x2)
M=1000
b3sim=matrix(0,M,3)
b4sim=matrix(0,M,3)
error3=0
error4=0
S=sqrt(sum(fit$res^2)/47)
for (i in 1:M){
  Usim=rt(50,3)*S
  Ysim= X%*%fit$coef + Usim
  fitsim=lm(Ysim~x1+x2)
  b4sim[i,]=fitsim$coef
  parsim=optim(fitsim$coef,L1,gr=NULL,x1,x2,Ysim)
  b3sim[i,]=parsim$par
  error4=error4+sum((b4sim[i,]-fit$coef)^2)
  error3=error3+sum((b3sim[i,]-fit$coef)^2)
}
average_error4=error4/1000 #this is L2
average_error3=error3/1000 #this is L1, this looks better which is very strange.

#f
X=cbind(1,x1,x2)
M=1000
b5sim=matrix(0,M,3)
b6sim=matrix(0,M,3)
count=0
beta_f=c(2,-1,1)
S=sqrt(sum(fit$res^2)/47)
for (i in 1:M){
  Usim=rt(50,3)*S
  Ysim= X%*%beta_f + Usim
  fitsim=lm(Ysim~x1+x2)
  b6sim[i,]=fitsim$coef #this is L2
  parsim=optim(fitsim$coef,L1,gr=NULL,x1,x2,Ysim)
  b5sim[i,]=parsim$par #this is L1

  S_f=vcov(fitsim)
  coef_f=fitsim$coef
  con=c(0,0.7,0)
  t_f=(coef_f[2]+1)/sqrt(t(con)%*%S_f%*%con)
  p_f=2*(1-pt(t_f,50))
  if (p_f < 0.05) {
    count=count+1
  }
  else{
    count=count+0
  }

}
print(count)
## [1] 71